################ Script for generating GBME output
rm(list=ls())

library(stringr)
library(igraph)
library(statnet)
library(NetData)
library(dplyr)
library(plyr)
library(foreign)
library(cshapes)
library(tidyverse)

load("replication/data/before.RData") 
load("replication/data/after.RData")
source("replication/r/gbme.r")
source("replication/r/GetDyadDist.R")




### First need to create adjacency matrices to feed into GBME function

# before
beforeAdjComb = data.frame(ego = before$SourceCountry, 
                           alter = before$TargetCountry, 
                     valence = before$simon)
beforeAdjComb = graph.data.frame(beforeAdjComb, directed=FALSE)
beforeAdjComb = get.adjacency(beforeAdjComb, attr="valence")


# after
afterAdjComb = data.frame(ego = after$SourceCountry, 
                          alter = after$TargetCountry, 
                           valence = after$simon)
afterAdjComb = graph.data.frame(afterAdjComb, directed=FALSE)
afterAdjComb = get.adjacency(afterAdjComb, attr="valence")




## add in geographic distance covariate
load("replication/data/minDist.RData")

# subset to set of countries
countries = c("630", "640", "645", "651", "652", "660", 
              "663", "666", "670", "690", "692", "620",
              "694", "696", "698", "615", "600", "616", 
              "678")

minDist2 = minDist[rownames(minDist) %in% countries, colnames(minDist) %in% countries]

# make diagonal zero
diag(minDist2) = 0

# change to country names
colnames(minDist2) <- countrycode::countrycode(colnames(minDist2), 
                                              origin = 'cown', 
                                              destination = 'country.name')

rownames(minDist2) <- countrycode::countrycode(rownames(minDist2), 
                                              origin = 'cown', 
                                              destination = 'country.name')

# reorder to match DV
minDist2 = minDist2[sort(colnames(minDist2)),sort(colnames(minDist2))]

# matrix -> array for GBME
dCov = array(minDist2, dim=c(nrow(beforeAdjComb),
                            nrow(beforeAdjComb), 1))/1000 # scale to thousands of kilometers


# add religion
# get country names
load("replication/data/before.RData")
xx = before$SourceCountry %>% unique()

# read religion data
df = 
  read_csv('replication/data/WRP_national.csv') %>% 
  filter(year == 2010) %>% 
  mutate(cname = countrycode::countrycode(state, origin = 'cown',
                                          destination = 'country.name')) %>% 
  select(cname, judgen, islmsun, islmshi) %>% 
  filter(cname %in% c(xx, 'Syrian Arab Republic', 'Iran (Islamic Republic of)'))

# fix names
df$cname[df$cname == 'Syrian Arab Republic'] <- 'Syria'
df$cname[df$cname == 'Iran (Islamic Republic of)'] <- 'Iran'


# POLITY
polity = read.csv("replication/data/p4v2013.csv")

#### preparing polity
polity = polity[, c(4, 5, 11)]
polity = polity[polity$year>=2011, ]

polity = polity[polity$country == 'Algeria' | 
                  polity$country=="Bahrain" |
                  polity$country=="Egypt" |
                  polity$country=="Iran" |
                  polity$country=="Iraq" |
                  polity$country=="Israel" |
                  polity$country=="Jordan" |
                  polity$country=="Kuwait" |
                  polity$country=="Lebanon" |
                  polity$country=="Libya" |
                  polity$country == 'Morocco' | 
                  polity$country=="Oman" |
                  polity$country=="Qatar" |
                  polity$country=="Saudi Arabia" |
                  polity$country=="Syria" |
                  polity$country == 'Tunisia' | 
                  polity$country=="Turkey" |
                  polity$country=="UAE" |
                  polity$country=="Yemen", ]


# pre- and post weighting
beforePolity=NULL
afterPolity=NULL
for (i in unique(polity$country)){
  bp = (6/21)*polity$polity2[polity$country==i & polity$year==2011] +
    (12/21)*polity$polity2[polity$country==i & polity$year==2012] +
    (3/21)*polity$polity2[polity$country==i & polity$year==2013]
  beforePolity = append(beforePolity, bp)
  ap = polity$polity2[polity$country==i & polity$year==2013]
  afterPolity = append(afterPolity, ap)
}

# Deal with Egypt's missing polity value in 2012
beforePolity[3] = (6/9)*polity$polity2[polity$country=="Egypt" & polity$year==2011] +
  (3/9)*polity$polity2[polity$country=="Egypt" & polity$year==2013]

# deal with missing Tunisia (assign 2010)
beforePolity[16] = -4
afterPolity[16] = -4

# add names back in
polity = data.frame(country = as.character(sort(unique(polity$country))), 
                    beforePol = beforePolity, 
                    afterPol = afterPolity) %>% 
  mutate(country = as.character(country))

# fix UAE
polity$country[polity$country == 'UAE'] <- 'United Arab Emirates'


# GDP
gdppc = read.csv("replication/data/e10ea80a-b174-4c5b-ac39-bf6e515ee417_Data.csv")

#### preparing GDPpc
gdppc = gdppc[, c(3, 13, 14, 15)]
gdppc = gdppc[gdppc$Country.Name=="Algeria" |
                gdppc$Country.Name=="Bahrain" |
                gdppc$Country.Name=="Egypt, Arab Rep." |
                gdppc$Country.Name=="Iran, Islamic Rep." |
                gdppc$Country.Name=="Iraq" |
                gdppc$Country.Name=="Israel" |
                gdppc$Country.Name=="Jordan" |
                gdppc$Country.Name=="Kuwait" |
                gdppc$Country.Name=="Morocco" |
                gdppc$Country.Name=="Lebanon" |
                gdppc$Country.Name=="Libya" |
                gdppc$Country.Name=="Oman" |
                gdppc$Country.Name=="Qatar" |
                gdppc$Country.Name=="Saudi Arabia" |
                gdppc$Country.Name=="Syrian Arab Republic" |
                gdppc$Country.Name=="Tunisia" |
                gdppc$Country.Name=="Turkey" |
                gdppc$Country.Name=="United Arab Emirates" |
                gdppc$Country.Name=="Yemen, Rep.", ]

# replace missing values for Syria with the most recent observation (2007)
gdppc$X2011..YR2011.[gdppc$Country.Name == 'Syrian Arab Republic'] = 2065.54
gdppc$X2012..YR2012.[gdppc$Country.Name == 'Syrian Arab Republic'] = 2065.54
gdppc$X2013..YR2013.[gdppc$Country.Name == 'Syrian Arab Republic'] = 2065.54

# weighted before and after measures
gdppc$beforeGDP = (6/21)*gdppc$X2011..YR2011. + (12/21)*gdppc$X2012..YR2012. +
  (3/21)*gdppc$X2013..YR2013.
gdppc$afterGDP = gdppc$X2013..YR2013.

# fix names
gdppc$Country.Name = as.character(gdppc$Country.Name)
gdppc$Country.Name[gdppc$Country.Name == 'Egypt, Arab Rep.'] <- "Egypt"
gdppc$Country.Name[gdppc$Country.Name == "Iran, Islamic Rep."] <- "Iran"
gdppc$Country.Name[gdppc$Country.Name == "Yemen, Rep."] <- "Yemen"
gdppc$Country.Name[gdppc$Country.Name == "Syrian Arab Republic"] <- "Syria"



# merge in
Xs = 
  left_join(before, df, by = c('SourceCountry' = 'cname')) %>% 
  select(SourceCountry, judgen:islmshi) %>% 
  distinct() %>% 
  left_join(., polity[,c('country', 'beforePol')], 
            by = c('SourceCountry' = 'country')) %>% 
  left_join(., gdppc[,c('Country.Name', 'beforeGDP')], 
            by = c('SourceCountry' = 'Country.Name')) %>% 
  select(-SourceCountry) %>% 
  mutate_all(funs(scale))
Xr = 
  left_join(before, df, by = c('TargetCountry' = 'cname')) %>% 
  select(TargetCountry, judgen:islmshi) %>% 
  distinct() %>% 
  left_join(., polity[,c('country', 'afterPol')], 
            by = c('TargetCountry' = 'country')) %>% 
  left_join(., gdppc[,c('Country.Name', 'afterGDP')], 
            by = c('TargetCountry' = 'Country.Name')) %>% 
  select(-TargetCountry) %>% 
  mutate_all(funs(scale))


#### Now use GBME function on Pre-ISIS Data
# Simon

n = nrow(beforeAdjComb)
gbme(Y=as.matrix(beforeAdjComb), 
     Xd=dCov, 
     seed = 2342341,
     fam='gaussian',
     N=matrix(1,n,n), 
     k=2, 
     Xs = as.matrix(Xs),
     Xr = as.matrix(Xr),
     directed=FALSE, 
     NS=50000, 
     odens=100, #NS is number of simulations
     owrite=TRUE, 
     ofilename='replication/output/OUT-Be-Sim', #out is the different characteristics of the process
     zwrite=TRUE, 
     zfilename='replication/output/Z-Be-Sim', #z is position in latent space
     awrite=TRUE, 
     afilename='replication/output/A-Be-Sim', #a is center-specific intercepts
     bwrite=FALSE, 
     bfilename='replication/output/B-Be-Sim' #b is the receiver
)


### Now use GBME function on Post-ISIS Data
# Simon
n = nrow(afterAdjComb)
gbme(Y=as.matrix(afterAdjComb), 
     Xd=dCov, 
     seed = 2342341,
     fam='gaussian',
     N=matrix(1,n,n), 
     k=2, 
     Xs = as.matrix(Xs),
     Xr = as.matrix(Xr),
     directed=FALSE, 
     NS=50000, 
     odens=100, #NS is number of simulations
     owrite=TRUE, 
     ofilename='replication/output/OUT-Af-Sim', #out is the different characteristics of the process
     zwrite=TRUE, 
     zfilename='replication/output/Z-Af-Sim', #z is position in latent space
     awrite=TRUE, 
     afilename='replication/output/A-Af-Sim', #a is center-specific intercepts
     bwrite=FALSE, 
     bfilename='replication/output/B-Af-Sim' #b is the receiver
)
